Google images search (type:face)
"jisoo blackpink" "jennie blackpink" "Rosé blackpink" "lisa blackpink"
Scroll down to load more images until "Show more results" button comes up. Paste this into console:
urls = Array.from(document.querySelectorAll('.rg_di .rg_meta')).map(el=>JSON.parse(el.textContent).ou); window.open('data:text/csv;charset=utf-8,' + escape(urls.join('\n')));
Save the URLs as a csv.
https://www.pinterest.ca/lulamulala/blackpink-jisoo/social-media/
https://www.pinterest.ca/lulamulala/blackpink-jisoo/photoshoot/
https://www.pinterest.ca/lulamulala/blackpink-jennie/social-media/
https://www.pinterest.ca/lulamulala/blackpink-jennie/photoshoot/
https://www.pinterest.ca/lulamulala/blackpink-rose/social-media/
https://www.pinterest.ca/lulamulala/blackpink-rose/photoshoot/
https://www.pinterest.ca/lulamulala/blackpink-lisa/social-media/
https://www.pinterest.ca/lulamulala/blackpink-lisa/photoshoot/
import fastai.vision
from fastai.vision import *
from fastai.metrics import *
import numpy as np
import os
import re
import shutil
import cv2 as cv
import matplotlib.pyplot as plt
from pathlib import Path
classes = ["jisoo", "jennie", "rose", "lisa"]
path = Path('data')
frontal_face_cascade = cv.CascadeClassifier(cv.data.haarcascades + "haarcascade_frontalface_default.xml")
original_path = Path("original_data")
for c in classes:
n=0
print(c)
orig = original_path/c
image_paths = orig.ls()
for image_path in image_paths:
image = cv.imread(image_path.as_posix(), cv.COLOR_BGR2GRAY)
if image is None: continue
short_dim = np.min(image.shape[0:2])
long_dim = np.max(image.shape[0:2])
frontal_faces = frontal_face_cascade.detectMultiScale(gray, 1.3, 5)
#want 200 frontal_face-containing and somewhat square images per class
if len(frontal_faces)==1 and long_dim/short_dim<1.2 and n<=200:
n+=1
target = "data/valid/"+ c + "/" + c + "_" + str(n) + "_" + image_path.name
print(target)
else:
target = "data/train/"+ c + "/" + c + "_" + image_path.name
print(target)
# shutil.copy(image_path.as_posix(), target)
# I wanted to clean the entire dataset, but I guess its okay if the training set is noisy.
# center = (frontal_faces[0][1] + frontal_faces[0][3],
# frontal_faces[0][0] + frontal_faces[0][2])
# elif len(frontal_faces)>1:
# print(image_path.as_posix() + " MANY FACES")
# elif len(frontal_faces)==0:
# if np.argmin(image.shape[0:2]) == 0: #image is wide
transforms = get_transforms(
do_flip = True, #D
flip_vert = False,
max_zoom = 1.1, #default 1.1
max_rotate = 10, #default 10
max_lighting = 0.2, #default 0.2
max_warp = 0.1, #default 0.1
p_affine = 0.75, #D
p_lighting = 0.75) #D
torch.cuda.set_device(1)
np.random.seed(42)
data = (ImageDataBunch
.from_folder(path,
train="train", valid="valid", bs=32,
ds_tfms=transforms, size=336)
# .filter_by_func(lambda fname:"Blackpink" in Path(fname).as_posix())
.normalize(imagenet_stats))
data.show_batch(rows=3, figsize=(7,8), ds_type=DatasetType.Valid)
data.show_batch(rows=3, figsize=(7,8))
learn = cnn_learner(data, models.resnet50, metrics=error_rate)
lr_find(learn)
learn.recorder.plot()
lr=5e-3
learn.fit_one_cycle(10, lr)
learn.recorder.plot_losses()
learn.show_results(rows=10, shuffle=True)
interp = ClassificationInterpretation.from_learner(learn)
losses,idxs = interp.top_losses()
interp.plot_confusion_matrix()
interp.plot_top_losses(20, figsize=(15,11))
interp.most_confused(min_val=2)
model_path = Path("../../models")
learn.save(model_path/"2019-06-28_RESNET50_10epoch_0.11error")
image_paths = Path("predict/nick_wang").ls()
for image_path in image_paths:
image = open_image(image_path)
pred_class,pred_idx,outputs = learn.predict(image)
title = str(pred_class) + ". Probability = " + str(outputs[pred_idx.item()].item())
image.show(figsize=(4,4), title=title)
!jupyter nbconvert blackpink-classifier --to html --output nbs/2019-06-28_RESNET50_0.11error